###############################################################################
### Replication code for:
###“Party or Policy? The Role of Policy Partisanship in Voter Decision-Making”

### Authors: Clareta Treger, Thomas Galipeau, Thomas Bergeron, Sarah Lachance, 
### Natasha Goel, Md Mujahedul Islam, Blake Lee-Whiting, Beatrice Magistro 
### and Peter John Loewen

### Journal: Political Behavior

### Last update: February 19, 2026
###############################################################################

###############################################################################
### Main analysis ###
###############################################################################

#load the libraries

library(xtable)
library(tidyverse)
library(ggplot2)
library(jtools)
library(ggeffects)
library(estimatr)
library(grid)
library(scales)
library(ggpubr)
library(car)
library(reshape2)
library(marginaleffects)
library(stats)
library(modelsummary)
library(dplyr)
library(stringr)
library(tinytable)
library(insight)
library(fastDummies)
library(pander)
library(haven)
library(tidyr)
library(officer)
library(flextable)

#custom function to compare marginal contrasts
compare_contrasts <- function(contrast_df, estimate_col = "avg_estimate", se_col = "avg_se", contrast_col = "contrast") {
  # Get column values
  ests <- contrast_df[[estimate_col]]
  ses <- contrast_df[[se_col]]
  labels <- contrast_df[[contrast_col]]
  
  # All unique row combinations
  combs <- combn(seq_along(ests), 2)
  
  # Compute stats
  pairwise <- apply(combs, 2, function(idx) {
    i <- idx[1]; j <- idx[2]
    diff <- ests[i] - ests[j]
    se_diff <- sqrt(ses[i]^2 + ses[j]^2)
    z <- diff / se_diff
    p <- 2 * (1 - pnorm(abs(z)))
    
    tibble::tibble(
      contrast1 = labels[i],
      contrast2 = labels[j],
      diff = diff,
      se_diff = se_diff,
      z = z,
      p = p
    )
  })
  
  # Return as a data frame
  dplyr::bind_rows(pairwise) |> dplyr::arrange(p)
}

### set working directory

#setwd(" ")

### Load data

dat3 <- read.csv("main_data_party_or_policy.csv")

#making sure the correct levels are applied for the tables
dat3 <- dat3 %>%
  mutate(across(
    c(cong_PID, cong_cc, cong_in, cong_co, cong_de, cong_ho, cong_he), 
    ~ {
      x <- as.factor(.)
      # Identify the level with the prefix
      ref_level <- str_subset(levels(x), "Non-congruent") 
      relevel(x, ref = ref_level)
    }
  ))


### Figure 1: Distribution of Policy Partisanship 

p_desc_partisan2 <- ggplot(dat3, aes(sum_partisan2)) +
  geom_bar(aes(y=..count../sum(..count..))) +
  scale_y_continuous(labels=percent_format()) +
  labs(x= "Number of Partisan Policies", y = "Percentage of Respondents") +
  theme_bw() +
  theme(axis.text.x = element_text(size = 11),
        axis.text.y = element_text(size = 11),
        axis.title.x = element_text(size = 13),
        axis.title.y = element_text(size = 13)) 

print(p_desc_partisan2)

#try(ggsave("", p_desc_partisan2, width = 6, height = 4, units = "in", dpi = 300))

### Figure 2: Distribution of Party Associations with Policies by Issue and Policy Ideology

## Create a dataset with the frequencies of party assignment by policy
freq <- as.data.frame(prop.table(apply(dat3[, c("cc1", "cc2", "in1", "in2" ,"co1" ,"co2" ,"de1", "de2" ,"ho1" ,"ho2", "he1" ,"he2")], 2 , table), margin = 2))
freq$party <- row.names(freq)

## Reformat dataset wide to long
freq_long <- melt(freq, id.vars = c("party"), variable.name = "Policy")
freq_long$issue[1:8] <- "Climate"
freq_long$issue[9:16] <- "Income redistribution"
freq_long$issue[17:24] <- "Cost of Living"
freq_long$issue[25:32] <- "Deficit"
freq_long$issue[33:40] <- "Housing"
freq_long$issue[41:48] <- "Healthcare"

freq_long$Policy_Ideology[1:4] <- "Left"
freq_long$Policy_Ideology[5:8] <- "Right"
freq_long$Policy_Ideology[9:12] <- "Left"
freq_long$Policy_Ideology[13:16] <- "Right"
freq_long$Policy_Ideology[17:20] <- "Left"
freq_long$Policy_Ideology[21:24] <- "Right"
freq_long$Policy_Ideology[25:28] <- "Left"
freq_long$Policy_Ideology[29:32] <- "Right"
freq_long$Policy_Ideology[33:36] <- "Right"
freq_long$Policy_Ideology[37:40] <- "Left"
freq_long$Policy_Ideology[41:44] <- "Left"
freq_long$Policy_Ideology[45:48] <- "Right"

## Create a bar chart for Figure 2

p_bar <- freq_long %>% 
  mutate(
    party = factor(party, levels = rev(c("NDP", "Liberal", "Non-partisan", "Conservative"))),
    Policy_Ideology = case_when(
      Policy_Ideology == "Left" ~ "Left-leaning policies",
      Policy_Ideology == "Right" ~ "Right-leaning policies"
    )) %>% 
  ggplot(aes(x = str_wrap(issue,10), y = value, fill = party)) +
  geom_col(color = "white", linewidth = 0.4, width = 0.75) +
  geom_text(aes(label = paste0(round(value * 100, 0), "%")), 
            position = position_stack(vjust = 0.5), 
            size = 4, 
            color = "white", fontface = "bold") +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "gray40") +
  facet_wrap(~ Policy_Ideology, ncol = 1, strip.position = "top") +
  labs(
    fill = NULL, 
    title = NULL,
    y = NULL,
    x = NULL
  ) +
  coord_flip() +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_blank(),
    plot.caption = element_text(hjust = 0, size = 10, color = "gray30"),
    strip.text = element_text(size = 14, face = "bold", hjust = 0),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_fill_manual(
    values = c(
      "NDP" = "#f39200",         # softer orange
      "Liberal" = "#d73027",     # muted red
      "Non-partisan" = "#999999",
      "Conservative" = "#4575b4" # cooler blue
    )
  ) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  guides(fill = guide_legend(nrow = 2, byrow = TRUE, reverse = TRUE))

print(p_bar)
#try(ggsave("", p_bar, width = 6, height = 7.5, units = "in", dpi = 300))


## Get the range of proportions across issues of respondents "who get it right"
# Right-wing policies
range(freq_long$value[freq_long$party=="Conservative" & freq_long$Policy_Ideology=="Right"])
mean(freq_long$value[freq_long$party=="Conservative" & freq_long$Policy_Ideology=="Right"])

# Left-wing policies
cc_left_correct <- freq_long$value[freq_long$Policy == "cc1" & freq_long$party=="Liberal"] + freq_long$value[freq_long$Policy == "cc1" & freq_long$party=="NDP"]
in_left_correct <- freq_long$value[freq_long$Policy == "in1" & freq_long$party=="Liberal"] + freq_long$value[freq_long$Policy == "in1" & freq_long$party=="NDP"]
co_left_correct <- freq_long$value[freq_long$Policy == "co1" & freq_long$party=="Liberal"] + freq_long$value[freq_long$Policy == "co1" & freq_long$party=="NDP"]
de_left_correct <- freq_long$value[freq_long$Policy == "de1" & freq_long$party=="Liberal"] + freq_long$value[freq_long$Policy == "de1" & freq_long$party=="NDP"]
ho_left_correct <- freq_long$value[freq_long$Policy == "ho1" & freq_long$party=="Liberal"] + freq_long$value[freq_long$Policy == "ho1" & freq_long$party=="NDP"]
he_left_correct <- freq_long$value[freq_long$Policy == "he1" & freq_long$party=="Liberal"] + freq_long$value[freq_long$Policy == "he1" & freq_long$party=="NDP"]

range(cc_left_correct, in_left_correct, co_left_correct, de_left_correct, ho_left_correct, he_left_correct)
mean(cc_left_correct, in_left_correct, co_left_correct, de_left_correct, ho_left_correct, he_left_correct)


### Table 2: Chapel Hill Expert Survey 2023 party scores on policy items 
### matching those included in the experiment (N=44)

#load data

dat <- read_dta("CHES_CA2023_expert-level.dta")

#create table
tab_ches <- dat %>%
  filter(party_id %in% 7001:7003) %>%
  mutate(party = factor(party_id,
                        levels = c(7001, 7002, 7003),
                        labels = c("Conservative", "Liberal", "NDP"))) %>%
  group_by(party) %>%
  summarise(
    redistribution = mean(redistribution, na.rm = TRUE),
    environment    = mean(environment,   na.rm = TRUE),
    spendvtax      = mean(spendvtax,     na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(redistribution, environment, spendvtax),
               names_to = "item", values_to = "Mean") %>%
  mutate(`CHES Item` = case_when(
    item == "redistribution" ~ "Position on redistribution of wealth from the rich to the poor.\n[0 - Strongly favors redistribution; 10 - Strongly opposes redistribution]",
    item == "environment"    ~ "Position towards environmental sustainability\n[0 - Strongly supports environmental protection even at the cost of economic growth; 10 - Strongly supports economic growth even at the cost of environmental protection]",
    item == "spendvtax"      ~ "Position on improving public services vs. reducing taxes.\n[0 - Strongly favors improving public services; 10 - Strongly favors reducing taxes]"
  )) %>%
  select(`CHES Item`, party, Mean) %>%
  pivot_wider(names_from = party, values_from = Mean) %>%
  mutate(across(c(Conservative, Liberal, NDP), ~ round(.x, 2)))

print(tab_ches)

##Export to word 

ft <- flextable(tab) %>% autofit()

doc <- read_docx() %>%
  body_add_par("CHES Item", style = "heading 1") %>%
  body_add_flextable(ft)

print(doc, target = "CHES_party_positions.docx")

### Figure 3: Average Marginal Effect of Policy Congruence on Candidate Rating by 
### Implicit Party Cue (Control Group)

### Test Hypothesis H1  
## Regression (Table D1 in SI. *Also included in the appendix section D of this script)
#here we use robust standard errors clustered by respondent ID for the table
h1_cont_rate <- lm_robust(scale ~ d_gender + as.factor(d_experience) + f_cc2_2 + f_co2_2 + f_de2_2 + f_he2_2 + f_ho2_2 + f_in2_2 + PID + post_secondary + citizen + province + income + male,
                          dat = dat3[dat3$Treatment == "No partisanship",],
                          clusters = ResponseId) 

modelsummary(h1_cont_rate, stars = TRUE)

# Run a linear regression without robust standard errors because marginal effects() 
# does not accept lm_robust objects; instead computing robust standard errors 
# later with marginaleffects(..., vcov = ~ResponseId) 
lm_h1_cont_rate <- lm(scale ~ d_gender + d_experience + f_cc2_2 + f_co2_2 + f_de2_2 + f_he2_2 + f_ho2_2 + f_in2_2 + PID + post_secondary + citizen + province + income + male,
                      data = dat3[dat3$Treatment == "No partisanship",])

# Compute marginal effects using pairwise contrast between the levels of each 
# policy partisanship variable (total = 6; one for each issue)

# Climate change

# Compute marginal effects 
cc_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate, variables = list(f_cc2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
cc_comp_avg_robust <- cc_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

cc_comp <- cc_comp_avg_robust |> 
  filter(
    contrast == "Climate Change1: Non-congruent and non-partisan - Climate Change1: Congruent and non-partisan" |
      contrast == "Climate Change1: Non-congruent and partisan-matching - Climate Change1: Congruent and partisan-matching" | 
      contrast == "Climate Change1: Non-congruent and partisan-non-matching - Climate Change1: Congruent and partisan-non-matching")

cc_dif <- compare_contrasts(cc_comp)
View(cc_dif)

# Selecting the contrasts of interest
cc_contrast_npartisan <- -cc_comp_avg_robust$avg_estimate[4] # non-partisan (Climate Change1: Non-congruent and non-partisan - Climate Change1: Congruent and non-partisan)
cc_contrast_partisan_match <- -cc_comp_avg_robust$avg_estimate[8] # in-party: putting minus sign because non-congruent is first and we want the increase in support when congruent compare to non-congruent (congruent minus non-congruent); we need to switch signs
cc_contrast_partisan_nmatch <- -cc_comp_avg_robust$avg_estimate[13] # out-party

# Confidence intervals
cc_low_npartisan <- -cc_comp_avg_robust$conf.low[4]
cc_low_partisan_match <- -cc_comp_avg_robust$conf.low[8]
cc_low_partisan_nmatch <- -cc_comp_avg_robust$conf.low[13]

cc_hi_npartisan <- -cc_comp_avg_robust$conf.high[4]
cc_hi_partisan_match <- -cc_comp_avg_robust$conf.high[8]
cc_hi_partisan_nmatch <- -cc_comp_avg_robust$conf.high[13]

# Cost of Living # MAKE SURE THE ESTIMATES ARE THE GOOD ONES
co_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate, variables = list(f_co2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
co_comp_avg_robust <- co_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

co_comp <- co_comp_avg_robust |> 
  filter(
    contrast == "Cost of living1: Non-congruent and non-partisan - Cost of living1: Congruent and non-partisan" |
      contrast == "Cost of living1: Non-congruent and partisan-matching - Cost of living1: Congruent and partisan-matching" | 
      contrast == "Cost of living1: Non-congruent and partisan-non-matching - Cost of living1: Congruent and partisan-non-matching")

co_dif <- compare_contrasts(co_comp)
View(co_dif)

co_contrast_npartisan <- -co_comp_avg_robust$avg_estimate[4]
co_contrast_partisan_match <- -co_comp_avg_robust$avg_estimate[8]
co_contrast_partisan_nmatch <- -co_comp_avg_robust$avg_estimate[13]

co_low_npartisan <- -co_comp_avg_robust$conf.low[4]
co_low_partisan_match <- -co_comp_avg_robust$conf.low[8]
co_low_partisan_nmatch <- -co_comp_avg_robust$conf.low[13]

co_hi_npartisan <- -co_comp_avg_robust$conf.high[4]
co_hi_partisan_match <- -co_comp_avg_robust$conf.high[8]
co_hi_partisan_nmatch <- -co_comp_avg_robust$conf.high[13]

# Deficit
de_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate, variables = list(f_de2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
de_comp_avg_robust <- de_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

de_comp <- de_comp_avg_robust |> 
  filter(
    contrast == "Deficit1: Non-congruent and non-partisan - Deficit1: Congruent and non-partisan" |
      contrast == "Deficit1: Non-congruent and partisan-matching - Deficit1: Congruent and partisan-matching" | 
      contrast == "Deficit1: Non-congruent and partisan-non-matching - Deficit1: Congruent and partisan-non-matching")

de_dif <- compare_contrasts(de_comp)
View(de_dif)

de_contrast_npartisan <- -de_comp_avg_robust$avg_estimate[4]
de_contrast_partisan_match <- -de_comp_avg_robust$avg_estimate[8]
de_contrast_partisan_nmatch <- -de_comp_avg_robust$avg_estimate[13]

de_low_npartisan <- -de_comp_avg_robust$conf.low[4]
de_low_partisan_match <- -de_comp_avg_robust$conf.low[8]
de_low_partisan_nmatch <- -de_comp_avg_robust$conf.low[13]

de_hi_npartisan <- -de_comp_avg_robust$conf.high[4]
de_hi_partisan_match <- -de_comp_avg_robust$conf.high[8]
de_hi_partisan_nmatch <- -de_comp_avg_robust$conf.high[13]

# Healthcare
he_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate, variables = list(f_he2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
he_comp_avg_robust <- he_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

he_comp <- he_comp_avg_robust |> 
  filter(
    contrast == "Healthcare1: Non-congruent and non-partisan - Healthcare1: Congruent and non-partisan" |
      contrast == "Healthcare1: Non-congruent and partisan-matching - Healthcare1: Congruent and partisan-matching" | 
      contrast == "Healthcare1: Non-congruent and partisan-non-matching - Healthcare1: Congruent and partisan-non-matching")

he_dif <- compare_contrasts(he_comp)
View(he_dif)

he_contrast_npartisan <- -he_comp_avg_robust$avg_estimate[4]
he_contrast_partisan_match <- -he_comp_avg_robust$avg_estimate[8]
he_contrast_partisan_nmatch <- -he_comp_avg_robust$avg_estimate[13]

he_low_npartisan <- -he_comp_avg_robust$conf.low[4]
he_low_partisan_match <- -he_comp_avg_robust$conf.low[8]
he_low_partisan_nmatch <- -he_comp_avg_robust$conf.low[13]

he_hi_npartisan <- -he_comp_avg_robust$conf.high[4]
he_hi_partisan_match <- -he_comp_avg_robust$conf.high[8]
he_hi_partisan_nmatch <- -he_comp_avg_robust$conf.high[13]

# Income inequality
in_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate, variables = list(f_in2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
in_comp_avg_robust <-  in_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

in_comp <- in_comp_avg_robust |> 
  filter(
    contrast == "Income1: Non-congruent and non-partisan - Income1: Congruent and non-partisan" |
      contrast == "Income1: Non-congruent and partisan-matching - Income1: Congruent and partisan-matching" | 
      contrast == "Income1: Non-congruent and partisan-non-matching - Income1: Congruent and partisan-non-matching")

in_dif <- compare_contrasts(in_comp)
View(in_dif)

in_contrast_npartisan <- -in_comp_avg_robust$avg_estimate[4]
in_contrast_partisan_match <- -in_comp_avg_robust$avg_estimate[8]
in_contrast_partisan_nmatch <- -in_comp_avg_robust$avg_estimate[13]

in_low_npartisan <- -in_comp_avg_robust$conf.low[4]
in_low_partisan_match <- -in_comp_avg_robust$conf.low[8]
in_low_partisan_nmatch <- -in_comp_avg_robust$conf.low[13]

in_hi_npartisan <- -in_comp_avg_robust$conf.high[4]
in_hi_partisan_match <- -in_comp_avg_robust$conf.high[8]
in_hi_partisan_nmatch <- -in_comp_avg_robust$conf.high[13]

# Housing
ho_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate, variables = list(f_ho2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
ho_comp_avg_robust <- ho_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

ho_comp <- ho_comp_avg_robust |> 
  filter(
    contrast == "Housing1: Non-congruent and non-partisan - Housing1: Congruent and non-partisan" |
      contrast == "Housing1: Non-congruent and partisan-matching - Housing1: Congruent and partisan-matching" | 
      contrast == "Housing1: Non-congruent and partisan-non-matching - Housing1: Congruent and partisan-non-matching")

ho_dif <- compare_contrasts(ho_comp)
View(ho_dif)

ho_contrast_npartisan <- -ho_comp_avg_robust$avg_estimate[4]
ho_contrast_partisan_match <- -ho_comp_avg_robust$avg_estimate[8]
ho_contrast_partisan_nmatch <- -ho_comp_avg_robust$avg_estimate[13]

ho_low_npartisan <- -ho_comp_avg_robust$conf.low[4]
ho_low_partisan_match <- -ho_comp_avg_robust$conf.low[8]
ho_low_partisan_nmatch <- -ho_comp_avg_robust$conf.low[13]

ho_hi_npartisan <- -ho_comp_avg_robust$conf.high[4]
ho_hi_partisan_match <- -ho_comp_avg_robust$conf.high[8]
ho_hi_partisan_nmatch <- -ho_comp_avg_robust$conf.high[13]

#Create a dataset with the marginal contrasts for plotting
marginal_contrasts_cont <- data.frame(
  policy = c("cc", "co", "de", "he", "ho", "in"),
  contrast_npartisan = c(cc_contrast_npartisan, co_contrast_npartisan, de_contrast_npartisan, he_contrast_npartisan, ho_contrast_npartisan, in_contrast_npartisan),
  contrast_partisan_match = c(cc_contrast_partisan_match, co_contrast_partisan_match, de_contrast_partisan_match, he_contrast_partisan_match, ho_contrast_partisan_match, in_contrast_partisan_match),
  contrast_partisan_nmatch = c(cc_contrast_partisan_nmatch, co_contrast_partisan_nmatch, de_contrast_partisan_nmatch, he_contrast_partisan_nmatch, ho_contrast_partisan_nmatch, in_contrast_partisan_nmatch),
  low_npartisan = c(cc_low_npartisan, co_low_npartisan, de_low_npartisan, he_low_npartisan, ho_low_npartisan, in_low_npartisan),
  low_partisan_match = c(cc_low_partisan_match, co_low_partisan_match, de_low_partisan_match, he_low_partisan_match, ho_low_partisan_match, in_low_partisan_match),
  low_partisan_nmatch = c(cc_low_partisan_nmatch, co_low_partisan_nmatch, de_low_partisan_nmatch, he_low_partisan_nmatch, ho_low_partisan_nmatch, in_low_partisan_nmatch),
  hi_npartisan = c(cc_hi_npartisan, co_hi_npartisan, de_hi_npartisan, he_hi_npartisan, ho_hi_npartisan, in_hi_npartisan),
  hi_partisan_match = c(cc_hi_partisan_match, co_hi_partisan_match, de_hi_partisan_match, he_hi_partisan_match, ho_hi_partisan_match, in_hi_partisan_match),
  hi_partisan_nmatch = c(cc_hi_partisan_nmatch, co_hi_partisan_nmatch, de_hi_partisan_nmatch, he_hi_partisan_nmatch, ho_hi_partisan_nmatch, in_hi_partisan_nmatch)
)

# Plot marginal contrasts

# Create a new variable for dodging point estimates
marginal_contrasts_cont <- marginal_contrasts_cont %>%
  mutate(policy_dodged = ifelse(policy == "cc", 1,
                                ifelse(policy == "co", 2,
                                       ifelse(policy == "de", 3,
                                              ifelse(policy == "he", 4,
                                                     ifelse(policy == "ho", 5,
                                                            ifelse(policy == "in", 6, NA)))))))

long_data <- marginal_contrasts_cont %>%
  select(policy_dodged,
         `In-party` = contrast_partisan_match,
         `Non-partisan` = contrast_npartisan,
         `Out-party` = contrast_partisan_nmatch,
         low_in = low_partisan_match,
         hi_in = hi_partisan_match,
         low_np = low_npartisan,
         hi_np = hi_npartisan,
         low_out = low_partisan_nmatch,
         hi_out = hi_partisan_nmatch) %>%
  pivot_longer(cols = c(`In-party`, `Non-partisan`, `Out-party`),
               names_to = "Cue",
               values_to = "Estimate") %>%
  mutate(
    ymin = case_when(
      Cue == "In-party" ~ low_in,
      Cue == "Non-partisan" ~ low_np,
      Cue == "Out-party" ~ low_out
    ),
    ymax = case_when(
      Cue == "In-party" ~ hi_in,
      Cue == "Non-partisan" ~ hi_np,
      Cue == "Out-party" ~ hi_out
    )
  )

# Set dodge position
pd <- position_dodge(width = 0.6)
View(long_data)

# Plot Figure 3 
lm_cont <- ggplot(long_data, aes(x = factor(policy_dodged), y = Estimate, 
                                 color = Cue, 
                                 shape = Cue)) +
  geom_pointrange(aes(ymin = ymin, ymax = ymax), position = pd, size = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray30", linewidth = 0.5) +
  scale_color_manual(values = c(
    "In-party" = "black",       
    "Non-partisan" = "grey40",    
    "Out-party" = "grey60"      
  )) +
  scale_shape_manual(values = c(
    "In-party" = 16,
    "Non-partisan" = 17,
    "Out-party" = 15
  )) +
  scale_x_discrete(labels = c("Climate", "Cost of Living", "Deficit", "Healthcare", "Housing", "Income\n redistribution")) +
  ylim(-1.2, 1.2) +
  labs(
    y = "Average Marginal Effect of Congruence",
    x = NULL,
    color = "Implicit Party Cue",
    shape = "Implicit Party Cue"
  ) +
  theme_bw(base_size = 13) +
  theme(
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 12),
    axis.text.y = element_text(size = 11),
    axis.title.y = element_text(size = 13),
    legend.position = "bottom",
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 11)
  )  
print(lm_cont)

#try(ggsave("", lm_cont, width = 8, height = 6, units = "in", dpi = 300))

### Table 3: Differences between the Effects of Policy Congruence Across Policy 
### Partisanship Categories (Control Group)

#use this to view the differences and to build table 3 in the article.
tab_3_differences <- bind_rows(cc_dif, co_dif, de_dif, he_dif, ho_dif, in_dif)
print(tab_3_differences)

## Figure 4: Average Marginal Effect of Policy Congruence and Other Conjoint 
### Attributes on Candidate Rating by Experimental Condition

###  Test Hypothesis H2

## Re-label for plot

#levels(dat3$Treatment) <- c("No partisanship", "Partisanship")

dat3 <- dat3 |>
  mutate(
    cong_cc = as.factor(cong_cc),
    cong_in = as.factor(cong_in),
    cong_co = as.factor(cong_co),
    cong_de = as.factor(cong_de),
    cong_ho = as.factor(cong_ho),
    cong_he = as.factor(cong_he),
    cong_PID = as.factor(cong_PID),
    cong_cc = relevel(cong_cc, ref = "ccNon-congruent"),
    cong_in = relevel(cong_in, ref = "inNon-congruent"),
    cong_co = relevel(cong_co, ref = "coNon-congruent"),
    cong_de = relevel(cong_de, ref = "deNon-congruent"),
    cong_ho = relevel(cong_ho, ref = "hoNon-congruent"),
    cong_he = relevel(cong_he, ref = "heNon-congruent"),
    cong_PID = relevel(cong_PID, ref = "Non-congruent partisanship")
  )

models <- list(
  "Treatment" = lm_robust(scale ~ d_gender + as.factor(d_experience) + cong_PID + cong_cc + cong_in + cong_co + cong_de + cong_ho + cong_he + PID + post_secondary + citizen + province + income + male,
                          data = dat3[dat3$Treatment == "Partisanship",],
                          clusters = ResponseId),
  "Control" = lm_robust(scale ~ d_gender + as.factor(d_experience) + cong_cc + cong_in + cong_co + cong_de + cong_ho + cong_he + PID + post_secondary + citizen + province + income + male,
                        data = dat3[dat3$Treatment == "No partisanship",],
                        clusters = ResponseId))

# Table E1: Regression of Candidate Evaluation on Policy Congruence and Other 
### attributes. Dependent variable: Candidate evaluation.

modelsummary(models, stars = TRUE)

# Extracting relevant data, filtering terms, and relabeling
plot_data <- lapply(names(models), function(group) {
  data.frame(
    group = group,
    term = names(models[[group]]$coefficients),
    estimate = models[[group]]$coefficients,
    std_error = models[[group]]$std.error,
    p_value = models[[group]]$p.value
  )
}) %>%
  bind_rows() %>%
  filter(grepl("^d_|^cong|as.fac", term)) %>%  # Keep terms starting with "d_" or "cong"
  mutate(
    ci_lower = estimate - 1.96 * std_error,
    ci_upper = estimate + 1.96 * std_error,
    term = case_when(
      term == "d_genderWoman" ~ "Woman",
      term == "as.factor(d_experience)4" ~ "Experience = 4 years",
      term == "as.factor(d_experience)12" ~ "Experience = 12 years",
      term == "cong_ccccCongruent" ~ "Climate Change: Congruent",
      term == "cong_hohoCongruent" ~ "Housing: Congruent",
      term == "cong_cocoCongruent" ~ "Cost of Living: Congruent",
      term == "cong_ininCongruent" ~ "Income redistribution: Congruent",
      term == "cong_dedeCongruent" ~ "Deficit: Congruent",
      term == "cong_heheCongruent" ~ "Healthcare: Congruent",
      term == "cong_PIDCongruent partisanship" ~ "Explicit Party Cue: In-party",
      TRUE ~ term
    ),
    term = factor(term, levels = c(
      "Woman",
      "Experience = 4 years",
      "Experience = 12 years",
      "Climate Change: Congruent",
      "Housing: Congruent",
      "Cost of Living: Congruent",
      "Income redistribution: Congruent",
      "Deficit: Congruent",
      "Healthcare: Congruent",
      "Explicit Party Cue: In-party"
    ))
  )

View(plot_data)

# Create the plot
p_attributes <- ggplot(plot_data, aes(x = estimate, y = term, color = group, shape = group)) +
  geom_point(size = 3, position = position_dodge(width = 0.5)) +  # Add vertical dodge to points
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), position = position_dodge(width = 0.5), height = 0) +  # Horizontal error bars with no caps
  scale_color_manual(values = c("Control" = "black", "Treatment" = "grey50")) +  
  scale_x_continuous(breaks = seq(-.75, 1.5, .25)) +
  labs(
    x = "Estimates"
  ) +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    axis.title.y = element_blank(),
    axis.title.x = element_text(size = 18),  # <- this line makes the x-label larger
    panel.grid.major = element_line(color = "gray90"),
    axis.text = element_text(size = 15),
    legend.text = element_text(size = 15)
  ) +
  geom_vline(xintercept = 0, linetype = "dashed")

print(p_attributes)

#try(ggsave("", p_attributes, width=8, height=8, units="in", dpi=300))


###############################################################################
                        ### Supporting Information ###
###############################################################################

################
### SI Section A: Demographics ->> see rows 1448-1713 below
################

################
### SI Section C: Descriptive: Policy Partisanship and Policy Congruence
################

### Figure C1: Joint distribution of policy congruence and policy partisanship

dat3_pid <- dat3 %>%
  dplyr::filter(!is.na(PID))

cols <- c("f_cc2_2", "f_co2_2", "f_de2_2", "f_he2_2", "f_ho2_2", "f_in2_2")

total_congruent <- sum(sapply(dat3_pid[cols], function(x) sum(grepl("Congruent", x, ignore.case = FALSE))))
pp_cong_match <- sum(sapply(dat3_pid[cols], function(x) sum(grepl("Congruent and partisan-matching", x, ignore.case = FALSE))))/total_congruent
pp_cong_nmatch <- sum(sapply(dat3_pid[cols], function(x) sum(grepl("Congruent and partisan-non-matching", x, ignore.case = FALSE))))/total_congruent
pp_cong_np <- sum(sapply(dat3_pid[cols], function(x) sum(grepl("Congruent and non-partisan", x, ignore.case = FALSE))))/total_congruent

total_incongruent <- sum(sapply(dat3_pid[cols], function(x) sum(grepl("Non-congruent", x, ignore.case = FALSE))))
pp_incong_match <- sum(sapply(dat3_pid[cols], function(x) sum(grepl("Non-congruent and partisan-matching", x, ignore.case = FALSE))))/total_incongruent
pp_incong_nmatch <- sum(sapply(dat3_pid[cols], function(x) sum(grepl("Non-congruent and partisan-non-matching", x, ignore.case = FALSE))))/total_incongruent
pp_incong_np <- sum(sapply(dat3_pid[cols], function(x) sum(grepl("Non-congruent and non-partisan", x, ignore.case = FALSE))))/total_incongruent

# total observations used in figure
N_congruent <- total_congruent
N_incongruent <- total_incongruent
N_total <- total_congruent + total_incongruent
print(N_total)
print(N_congruent)
print(N_incongruent)


d_projec2 <- data.frame(
  value = c(pp_cong_match, pp_cong_nmatch, pp_cong_np,
            pp_incong_match, pp_incong_nmatch, pp_incong_np),
  Congruence = rep(c("Congruent policies","Incongruent policies"), each = 3),
  Partisanship = rep(c("In-group", "Out-Group", "Non-partisan"), times = 2)
)

p_bar_projec <- ggplot(d_projec2, aes(x = Congruence, y = value, fill = Partisanship)) +
  geom_bar(stat = "identity", position = "fill") +
  geom_text(aes(label = scales::percent(value, accuracy = 1)),
            position = position_fill(vjust = 0.5),
            size = 4, color = "white") +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 10)) +
  labs(fill = "Partisanship",
       title = "",
       x = NULL,
       y = "Percentage") +
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 16),
        legend.key.size = unit(0.8, "cm")
  ) +
  scale_fill_manual(values = c("In-group" = "black",
                               "Out-Group" = "grey40",
                               "Non-partisan" = "grey60"))

print(p_bar_projec)

#try(ggsave("", p_bar_projec, width = 8, height = 5, units = "in", dpi = 300))

################
### SI Section D: Regression of Candidate Evaluation on Policy Congruence and Partisanship
################

### Table D1 -> See code in rows 255-262 above 

### Table D2 + Figure D1: 
### Regression of Candidate Choice on Policy Congruence and Policy Partisanship. 
### Dependent variable: Candidate Choice

### Test Hypothesis H1
h1_cont <- lm_robust(Y ~ d_gender + as.factor(d_experience) + f_cc2_2 + f_co2_2 + f_de2_2 + f_he2_2 + f_ho2_2 + f_in2_2 + PID + post_secondary + citizen + province + income + male,
                     data = dat3[dat3$Treatment == "No partisanship",], clusters = ResponseId)  

modelsummary(h1_cont, stars = TRUE)

# Plot
# Compute marginal effects 
cc_comp_robust <- marginaleffects::comparisons(h1_cont, variables = list(f_cc2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
cc_comp_avg_robust <- cc_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
View(cc_comp_avg_robust)
# Selecting the contrasts of interest
cc_contrast_npartisan <- -cc_comp_avg_robust$avg_estimate[4] # non-partisan
cc_contrast_partisan_match <- -cc_comp_avg_robust$avg_estimate[8] # in-party: putting minus sign because non-congruent is first and we want the increase in support when congruent compare to non-congruent (congruent minus non-congruent); we need to switch signs
cc_contrast_partisan_nmatch <- -cc_comp_avg_robust$avg_estimate[13] # out-party

# Confidence intervals
cc_low_npartisan <- -cc_comp_avg_robust$conf.low[4]
cc_low_partisan_match <- -cc_comp_avg_robust$conf.low[8]
cc_low_partisan_nmatch <- -cc_comp_avg_robust$conf.low[13]

cc_hi_npartisan <- -cc_comp_avg_robust$conf.high[4]
cc_hi_partisan_match <- -cc_comp_avg_robust$conf.high[8]
cc_hi_partisan_nmatch <- -cc_comp_avg_robust$conf.high[13]

# Cost of Living
co_comp_robust <- marginaleffects::comparisons(h1_cont, variables = list(f_co2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
co_comp_avg_robust <- co_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

co_contrast_npartisan <- -co_comp_avg_robust$avg_estimate[4]
co_contrast_partisan_match <- -co_comp_avg_robust$avg_estimate[8]
co_contrast_partisan_nmatch <- -co_comp_avg_robust$avg_estimate[13]

co_low_npartisan <- -co_comp_avg_robust$conf.low[4]
co_low_partisan_match <- -co_comp_avg_robust$conf.low[8]
co_low_partisan_nmatch <- -co_comp_avg_robust$conf.low[13]

co_hi_npartisan <- -co_comp_avg_robust$conf.high[4]
co_hi_partisan_match <- -co_comp_avg_robust$conf.high[8]
co_hi_partisan_nmatch <- -co_comp_avg_robust$conf.high[13]

# Deficit
de_comp_robust <- marginaleffects::comparisons(h1_cont, variables = list(f_de2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
de_comp_avg_robust <- de_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
de_contrast_npartisan <- -de_comp_avg_robust$avg_estimate[4]
de_contrast_partisan_match <- -de_comp_avg_robust$avg_estimate[8]
de_contrast_partisan_nmatch <- -de_comp_avg_robust$avg_estimate[13]

de_low_npartisan <- -de_comp_avg_robust$conf.low[4]
de_low_partisan_match <- -de_comp_avg_robust$conf.low[8]
de_low_partisan_nmatch <- -de_comp_avg_robust$conf.low[13]

de_hi_npartisan <- -de_comp_avg_robust$conf.high[4]
de_hi_partisan_match <- -de_comp_avg_robust$conf.high[8]
de_hi_partisan_nmatch <- -de_comp_avg_robust$conf.high[13]

# Healthcare
he_comp_robust <- marginaleffects::comparisons(h1_cont, variables = list(f_he2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
he_comp_avg_robust <- he_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
he_contrast_npartisan <- -he_comp_avg_robust$avg_estimate[4]
he_contrast_partisan_match <- -he_comp_avg_robust$avg_estimate[8]
he_contrast_partisan_nmatch <- -he_comp_avg_robust$avg_estimate[13]

he_low_npartisan <- -he_comp_avg_robust$conf.low[4]
he_low_partisan_match <- -he_comp_avg_robust$conf.low[8]
he_low_partisan_nmatch <- -he_comp_avg_robust$conf.low[13]

he_hi_npartisan <- -he_comp_avg_robust$conf.high[4]
he_hi_partisan_match <- -he_comp_avg_robust$conf.high[8]
he_hi_partisan_nmatch <- -he_comp_avg_robust$conf.high[13]

# Income inequality
in_comp_robust <- marginaleffects::comparisons(h1_cont, variables = list(f_in2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
in_comp_avg_robust <-  in_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
in_contrast_npartisan <- -in_comp_avg_robust$avg_estimate[4]
in_contrast_partisan_match <- -in_comp_avg_robust$avg_estimate[8]
in_contrast_partisan_nmatch <- -in_comp_avg_robust$avg_estimate[13]

in_low_npartisan <- -in_comp_avg_robust$conf.low[4]
in_low_partisan_match <- -in_comp_avg_robust$conf.low[8]
in_low_partisan_nmatch <- -in_comp_avg_robust$conf.low[13]

in_hi_npartisan <- -in_comp_avg_robust$conf.high[4]
in_hi_partisan_match <- -in_comp_avg_robust$conf.high[8]
in_hi_partisan_nmatch <- -in_comp_avg_robust$conf.high[13]

# Housing
ho_comp_robust <- marginaleffects::comparisons(h1_cont, variables = list(f_ho2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
ho_comp_avg_robust <- ho_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
ho_contrast_npartisan <- -ho_comp_avg_robust$avg_estimate[4]
ho_contrast_partisan_match <- -ho_comp_avg_robust$avg_estimate[8]
ho_contrast_partisan_nmatch <- -ho_comp_avg_robust$avg_estimate[13]

ho_low_npartisan <- -ho_comp_avg_robust$conf.low[4]
ho_low_partisan_match <- -ho_comp_avg_robust$conf.low[8]
ho_low_partisan_nmatch <- -ho_comp_avg_robust$conf.low[13]

ho_hi_npartisan <- -ho_comp_avg_robust$conf.high[4]
ho_hi_partisan_match <- -ho_comp_avg_robust$conf.high[8]
ho_hi_partisan_nmatch <- -ho_comp_avg_robust$conf.high[13]


# Create a dataset with the marginal contrasts for plotting
marginal_contrasts_cont_choice <- data.frame(
                policy = c("cc", "co", "de", "he", "ho", "in"),
                contrast_npartisan = c(cc_contrast_npartisan, co_contrast_npartisan, de_contrast_npartisan, he_contrast_npartisan, ho_contrast_npartisan, in_contrast_npartisan),
                contrast_partisan_match = c(cc_contrast_partisan_match, co_contrast_partisan_match, de_contrast_partisan_match, he_contrast_partisan_match, ho_contrast_partisan_match, in_contrast_partisan_match),
                contrast_partisan_nmatch = c(cc_contrast_partisan_nmatch, co_contrast_partisan_nmatch, de_contrast_partisan_nmatch, he_contrast_partisan_nmatch, ho_contrast_partisan_nmatch, in_contrast_partisan_nmatch),
                low_npartisan = c(cc_low_npartisan, co_low_npartisan, de_low_npartisan, he_low_npartisan, ho_low_npartisan, in_low_npartisan),
                low_partisan_match = c(cc_low_partisan_match, co_low_partisan_match, de_low_partisan_match, he_low_partisan_match, ho_low_partisan_match, in_low_partisan_match),
                low_partisan_nmatch = c(cc_low_partisan_nmatch, co_low_partisan_nmatch, de_low_partisan_nmatch, he_low_partisan_nmatch, ho_low_partisan_nmatch, in_low_partisan_nmatch),
                hi_npartisan = c(cc_hi_npartisan, co_hi_npartisan, de_hi_npartisan, he_hi_npartisan, ho_hi_npartisan, in_hi_npartisan),
                hi_partisan_match = c(cc_hi_partisan_match, co_hi_partisan_match, de_hi_partisan_match, he_hi_partisan_match, ho_hi_partisan_match, in_hi_partisan_match),
                hi_partisan_nmatch = c(cc_hi_partisan_nmatch, co_hi_partisan_nmatch, de_hi_partisan_nmatch, he_hi_partisan_nmatch, ho_hi_partisan_nmatch, in_hi_partisan_nmatch)
              )

# Plot marginal contrasts

# Create a new variable for dodging
marginal_contrasts_cont_choice <- marginal_contrasts_cont_choice %>%
  mutate(policy_dodged = ifelse(policy == "cc", 1,
                                ifelse(policy == "co", 2,
                                       ifelse(policy == "de", 3,
                                              ifelse(policy == "he", 4,
                                                     ifelse(policy == "ho", 5,
                                                            ifelse(policy == "in", 6, NA)))))))


lm_cont_choice <- ggplot(marginal_contrasts_cont_choice, aes(x = policy_dodged)) + 
  geom_pointrange(aes(y = contrast_partisan_match, 
                      ymin = low_partisan_match, 
                      ymax = hi_partisan_match, 
                      color = "In-party", 
                      shape = "In-party"), 
                  position = position_dodge(width = 0.5)) +
  geom_pointrange(aes(x = policy_dodged + 0.2, 
                      y = contrast_npartisan, 
                      ymin = low_npartisan, 
                      ymax = hi_npartisan, 
                      color = "Non-partisan", 
                      shape = "Non-partisan"), 
                  position = position_dodge(width = 0.5)) +
  geom_pointrange(aes(x = policy_dodged + 0.4, 
                      y = contrast_partisan_nmatch, 
                      ymin = low_partisan_nmatch, 
                      ymax = hi_partisan_nmatch, 
                      color = "Out-party", 
                      shape = "Out-party"), 
                  position = position_dodge(width = 0.5)) +
  theme_bw() +
  ylim(-0.5, 0.5) +
  theme(axis.text.x = element_text(angle =0, hjust = 0.5, size = 11),
        axis.text.y = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text = element_text(size = 11),
        legend.title = element_text(size = 13),
        legend.position = "bottom",
        axis.title.x = element_blank())+
  labs(y = "Average Marginal Effect of Congruence", 
       color = "Implicit Party Cue", 
       shape = "Implicit Party Cue") +
  scale_color_manual(values = c("In-party" = "black",       
                                "Non-partisan" = "grey40",    
                                "Out-party" = "grey60" )) +
  scale_shape_manual(values = c("In-party" = 16, 
                                "Non-partisan" = 17, 
                                "Out-party" = 15)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray30", linewidth = 0.5) +
  scale_x_continuous(breaks = 1:6, 
                     labels = c("Climate", "Cost of Living", "Deficit", 
                                "Healthcare", "Housing", "Income redistribution"))

print(lm_cont_choice)

#ggsave("", width=8, height=5, units="in", dpi=300)


### Analyses with non-partisans

# In all following models, using PID2 instead of PID (in which non-partisans and partisans of parties not included in study are coded as NA).
# In previous models, using PID resulted in excluding non-partisans and partisans of parties not included in study due to listwise deletion
# In following models, using PID2 allows to include non-partisans and partisans of parties not included in study in the analysis

## H1

### Table D3: Regression of Candidate Evaluation on Policy Congruence and Policy 
### Partisanship, Including Non-partisans and Partisans of Minor Parties (Control Group). 
### Dependent variable: Candidate Evaluation.

# Model for regression table
h1_cont_rate2 <- lm_robust(scale ~ d_gender + as.factor(d_experience) + f_cc2_2 + f_co2_2 + f_de2_2 + f_he2_2 + f_ho2_2 + f_in2_2 + PID2 + post_secondary + citizen + province + income + male, 
                           data = dat3[dat3$Treatment == "No partisanship",],
                           clusters = ResponseId)  

modelsummary(h1_cont_rate2, stars = TRUE)

## H2

### Figure D2: Average Marginal Effect of Policy Congruence on Candidate Evaluation 
### by Implicit Party Cue (Control Group), including Non-partisans and Partisans of Minor Parties

# Model for plot
lm_h1_cont_rate2 <- lm(scale ~ d_gender + as.factor(d_experience) + f_cc2_2 + f_co2_2 + f_de2_2 + f_he2_2 + f_ho2_2 + f_in2_2 + PID2 + post_secondary + citizen + province + income + male,
                       data = dat3[dat3$Treatment == "No partisanship",]) 

# Compute marginal effects using pairwise contrast between the levels of each policy partisanship variable (total = 6; one for each issue)

# Climate change

# Compute marginal effects 
cc_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate2, variables = list(f_cc2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
cc_comp_avg_robust <- cc_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

# Selecting the contrasts of interest
cc_contrast_npartisan <- -cc_comp_avg_robust$avg_estimate[4] # non-partisan
cc_contrast_partisan_match <- -cc_comp_avg_robust$avg_estimate[8] # in-party: putting minus sign because non-congruent is first and we want the increase in support when congruent compare to non-congruent (congruent minus non-congruent); we need to switch signs
cc_contrast_partisan_nmatch <- -cc_comp_avg_robust$avg_estimate[13] # out-party

# Confidence intervals
cc_low_npartisan <- -cc_comp_avg_robust$conf.low[4]
cc_low_partisan_match <- -cc_comp_avg_robust$conf.low[8]
cc_low_partisan_nmatch <- -cc_comp_avg_robust$conf.low[13]

cc_hi_npartisan <- -cc_comp_avg_robust$conf.high[4]
cc_hi_partisan_match <- -cc_comp_avg_robust$conf.high[8]
cc_hi_partisan_nmatch <- -cc_comp_avg_robust$conf.high[13]

# Cost of Living
co_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate2, variables = list(f_co2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
co_comp_avg_robust <- co_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )

co_contrast_npartisan <- -co_comp_avg_robust$avg_estimate[4]
co_contrast_partisan_match <- -co_comp_avg_robust$avg_estimate[8]
co_contrast_partisan_nmatch <- -co_comp_avg_robust$avg_estimate[13]

co_low_npartisan <- -co_comp_avg_robust$conf.low[4]
co_low_partisan_match <- -co_comp_avg_robust$conf.low[8]
co_low_partisan_nmatch <- -co_comp_avg_robust$conf.low[13]

co_hi_npartisan <- -co_comp_avg_robust$conf.high[4]
co_hi_partisan_match <- -co_comp_avg_robust$conf.high[8]
co_hi_partisan_nmatch <- -co_comp_avg_robust$conf.high[13]

# Deficit
de_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate2, variables = list(f_de2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
de_comp_avg_robust <- de_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
de_contrast_npartisan <- -de_comp_avg_robust$avg_estimate[4]
de_contrast_partisan_match <- -de_comp_avg_robust$avg_estimate[8]
de_contrast_partisan_nmatch <- -de_comp_avg_robust$avg_estimate[13]

de_low_npartisan <- -de_comp_avg_robust$conf.low[4]
de_low_partisan_match <- -de_comp_avg_robust$conf.low[8]
de_low_partisan_nmatch <- -de_comp_avg_robust$conf.low[13]

de_hi_npartisan <- -de_comp_avg_robust$conf.high[4]
de_hi_partisan_match <- -de_comp_avg_robust$conf.high[8]
de_hi_partisan_nmatch <- -de_comp_avg_robust$conf.high[13]

# Healthcare
he_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate2, variables = list(f_he2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
he_comp_avg_robust <- he_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
he_contrast_npartisan <- -he_comp_avg_robust$avg_estimate[4]
he_contrast_partisan_match <- -he_comp_avg_robust$avg_estimate[8]
he_contrast_partisan_nmatch <- -he_comp_avg_robust$avg_estimate[13]

he_low_npartisan <- -he_comp_avg_robust$conf.low[4]
he_low_partisan_match <- -he_comp_avg_robust$conf.low[8]
he_low_partisan_nmatch <- -he_comp_avg_robust$conf.low[13]

he_hi_npartisan <- -he_comp_avg_robust$conf.high[4]
he_hi_partisan_match <- -he_comp_avg_robust$conf.high[8]
he_hi_partisan_nmatch <- -he_comp_avg_robust$conf.high[13]

# Income inequality
in_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate2, variables = list(f_in2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
in_comp_avg_robust <-  in_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
in_contrast_npartisan <- -in_comp_avg_robust$avg_estimate[4]
in_contrast_partisan_match <- -in_comp_avg_robust$avg_estimate[8]
in_contrast_partisan_nmatch <- -in_comp_avg_robust$avg_estimate[13]

in_low_npartisan <- -in_comp_avg_robust$conf.low[4]
in_low_partisan_match <- -in_comp_avg_robust$conf.low[8]
in_low_partisan_nmatch <- -in_comp_avg_robust$conf.low[13]

in_hi_npartisan <- -in_comp_avg_robust$conf.high[4]
in_hi_partisan_match <- -in_comp_avg_robust$conf.high[8]
in_hi_partisan_nmatch <- -in_comp_avg_robust$conf.high[13]

# Housing
ho_comp_robust <- marginaleffects::comparisons(lm_h1_cont_rate2, variables = list(f_ho2_2 = "pairwise"), vcov = ~ResponseId) # robust standard errors
ho_comp_avg_robust <- ho_comp_robust |>
  dplyr::group_by(contrast) |>
  dplyr::summarise(
    avg_estimate = mean(estimate),
    avg_se = mean(std.error),
    conf.low = avg_estimate - 1.96 * avg_se,
    conf.high = avg_estimate + 1.96 * avg_se,
    .groups = "drop"
  )
ho_contrast_npartisan <- -ho_comp_avg_robust$avg_estimate[4]
ho_contrast_partisan_match <- -ho_comp_avg_robust$avg_estimate[8]
ho_contrast_partisan_nmatch <- -ho_comp_avg_robust$avg_estimate[13]

ho_low_npartisan <- -ho_comp_avg_robust$conf.low[4]
ho_low_partisan_match <- -ho_comp_avg_robust$conf.low[8]
ho_low_partisan_nmatch <- -ho_comp_avg_robust$conf.low[13]

ho_hi_npartisan <- -ho_comp_avg_robust$conf.high[4]
ho_hi_partisan_match <- -ho_comp_avg_robust$conf.high[8]
ho_hi_partisan_nmatch <- -ho_comp_avg_robust$conf.high[13]


# Create a dataset with the marginal contrasts for plotting

marginal_contrasts_cont_np <- data.frame(
  policy = c("cc", "co", "de", "he", "ho", "in"),
  contrast_npartisan = c(cc_contrast_npartisan, co_contrast_npartisan, de_contrast_npartisan, he_contrast_npartisan, ho_contrast_npartisan, in_contrast_npartisan),
  contrast_partisan_match = c(cc_contrast_partisan_match, co_contrast_partisan_match, de_contrast_partisan_match, he_contrast_partisan_match, ho_contrast_partisan_match, in_contrast_partisan_match),
  contrast_partisan_nmatch = c(cc_contrast_partisan_nmatch, co_contrast_partisan_nmatch, de_contrast_partisan_nmatch, he_contrast_partisan_nmatch, ho_contrast_partisan_nmatch, in_contrast_partisan_nmatch),
  low_npartisan = c(cc_low_npartisan, co_low_npartisan, de_low_npartisan, he_low_npartisan, ho_low_npartisan, in_low_npartisan),
  low_partisan_match = c(cc_low_partisan_match, co_low_partisan_match, de_low_partisan_match, he_low_partisan_match, ho_low_partisan_match, in_low_partisan_match),
  low_partisan_nmatch = c(cc_low_partisan_nmatch, co_low_partisan_nmatch, de_low_partisan_nmatch, he_low_partisan_nmatch, ho_low_partisan_nmatch, in_low_partisan_nmatch),
  hi_npartisan = c(cc_hi_npartisan, co_hi_npartisan, de_hi_npartisan, he_hi_npartisan, ho_hi_npartisan, in_hi_npartisan),
  hi_partisan_match = c(cc_hi_partisan_match, co_hi_partisan_match, de_hi_partisan_match, he_hi_partisan_match, ho_hi_partisan_match, in_hi_partisan_match),
  hi_partisan_nmatch = c(cc_hi_partisan_nmatch, co_hi_partisan_nmatch, de_hi_partisan_nmatch, he_hi_partisan_nmatch, ho_hi_partisan_nmatch, in_hi_partisan_nmatch)
)

# Plot marginal contrasts

# Create a new variable for dodging point estimates
marginal_contrasts_cont_np <- marginal_contrasts_cont_np %>%
  mutate(policy_dodged = ifelse(policy == "cc", 1,
                                ifelse(policy == "co", 2,
                                       ifelse(policy == "de", 3,
                                              ifelse(policy == "he", 4,
                                                     ifelse(policy == "ho", 5,
                                                            ifelse(policy == "in", 6, NA)))))))

# Plot Figure D2
lm_cont2 <- ggplot(marginal_contrasts_cont_np, aes(x = policy_dodged)) + 
  geom_pointrange(aes(y = contrast_partisan_match, 
                      ymin = low_partisan_match, 
                      ymax = hi_partisan_match, 
                      color = "In-party", 
                      shape = "In-party"), 
                  position = position_dodge(width = 0.5)) +
  geom_pointrange(aes(x = policy_dodged + 0.2, 
                      y = contrast_npartisan, 
                      ymin = low_npartisan, 
                      ymax = hi_npartisan, 
                      color = "Non-partisan", 
                      shape = "Non-partisan"), 
                  position = position_dodge(width = 0.5)) +
  geom_pointrange(aes(x = policy_dodged + 0.4, 
                      y = contrast_partisan_nmatch, 
                      ymin = low_partisan_nmatch, 
                      ymax = hi_partisan_nmatch, 
                      color = "Out-party", 
                      shape = "Out-party"), 
                  position = position_dodge(width = 0.5)) +
  theme_bw() +
  ylim(-1.2, 1.2) +
  theme(axis.text.x = element_text(angle =0, hjust = 0.5, size = 11),
        axis.text.y = element_text(size = 11),
        axis.title.y = element_text(size = 13),
        legend.text = element_text(size = 11),
        legend.title = element_text(size = 13),
        legend.position = "bottom",
        axis.title.x = element_blank())+
  labs(y = "Average Marginal Effect of Congruence", 
       color = "Implicit Party Cue", 
       shape = "Implicit Party Cue") +
  scale_color_manual(values = c("In-party" = "black",       
                                "Non-partisan" = "grey40",    
                                "Out-party" = "grey60" )) +
  scale_shape_manual(values = c("In-party" = 16, 
                                "Non-partisan" = 17, 
                                "Out-party" = 15)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray30", linewidth = 0.5) +
  scale_x_continuous(breaks = 1:6, 
                     labels = c("Climate", "Cost of Living", "Deficit", 
                                "Healthcare", "Housing", "Income redistribution"))
print(lm_cont2)

#ggsave("", width=8, height=5, units="in", dpi=300)

################
### SI Section E: Regression of Candidate Evaluation on Policy Congruence and 
### Treatment Assignment 
################


# Table E1:  Regression of Candidate Evaluation on Policy Congruence and Other 
### attributes. Dependent variable: Candidate evaluation. 

models_e1 <- list(
  "Treatment" = lm_robust(scale ~ d_gender + as.factor(d_experience) + cong_PID + cong_cc + cong_in + cong_co + cong_de + cong_ho + cong_he + PID + post_secondary + citizen + province + income + male,
                          data = dat3[dat3$Treatment == "Partisanship",],
                          clusters = ResponseId),
  "Control" = lm_robust(scale ~ d_gender + as.factor(d_experience) + cong_cc + cong_in + cong_co + cong_de + cong_ho + cong_he + PID + post_secondary + citizen + province + income + male,
                        data = dat3[dat3$Treatment == "No partisanship",],
                        clusters = ResponseId))

modelsummary(models_e1, stars = TRUE)

# Table E2: Regression of Candidate Evaluation on the Interaction of Policy 
### Congruence and Treatment Assignment. Dependent variable: Candidate evaluation. 

te2 <- lm_robust(scale ~ d_gender + as.factor(d_experience) + cong_cc*Treatment + cong_co*Treatment + cong_de*Treatment + cong_he*Treatment + cong_ho*Treatment + cong_in*Treatment + PID + post_secondary + citizen + province + income + male,
                          data = dat3,
                          clusters = ResponseId)

modelsummary(te2, stars = TRUE)



###  Table E3: Regression of Candidate Evaluation on Policy Congruence and Other
### Attributes Including Non-partisans and Partisans of Minor Parties. 
### Dependent variable: Candidate evaluation.

m_h2_all <- list(
  "Treatment" = lm_robust(scale ~ d_PID + d_gender + as.factor(d_experience) + cong_cc + cong_in + cong_co + cong_de + cong_ho + cong_he + PID2 + post_secondary + citizen + province + income + male,
                           data = dat3[dat3$Treatment == "Partisanship",],
                           clusters = ResponseId),
  "Control" = lm_robust(scale ~ d_gender + as.factor(d_experience) + cong_cc + cong_in + cong_co + cong_de + cong_ho + cong_he + PID2 + post_secondary + citizen + province + income + male,
                          data = dat3[dat3$Treatment == "No partisanship",],
                          clusters = ResponseId)  
)

modelsummary(m_h2_all, stars = TRUE)

### Figure E1: Average Marginal Effect of Policy Congruence and Other Conjoint 
### Attributes on Candidate Evalaution by Experimental Condition Including 
### Non-partisans and Partisans of Minor Parties 

# Mapping for relabeling terms
term_labels <- c(
  'd_genderWoman' = 'Woman',
  "as.factor(d_experience)4" = "Experience = 4 years",
  "as.factor(d_experience)12" = "Experience = 12 years",
  "cong_ccccCongruent" = "Climate Change: Congruent",
  "cong_hohoCongruent" = "Housing: Congruent",
  "cong_cocoCongruent" = "Cost of Living: Congruent",
  "cong_ininCongruent" = "Inequality: Congruent",
  "cong_dedeCongruent" = "Deficit: Congruent",
  "cong_heheCongruent" = "Healthcare: Congruent",
  "d_PIDLiberal" = "Explicit Party Cue: Liberal"
)

plot_data <- lapply(names(m_h2_all), function(group) {
  data.frame(
    group = group,
    term = names(m_h2_all[[group]]$coefficients),
    estimate = m_h2_all[[group]]$coefficients,
    std_error = m_h2_all[[group]]$std.error,
    p_value = m_h2_all[[group]]$p.value
  )
}) %>%
  bind_rows() %>%
  filter(grepl("^d_|^cong", term)) %>%  # Keep terms starting with "d_" or "cong"
  mutate(
    ci_lower = estimate - 1.96 * std_error,
    ci_upper = estimate + 1.96 * std_error,
    term = factor(term, levels = names(term_labels), labels = term_labels)  # Reorder and relabel
  ) |>
  filter(term != "Woman")

# Create the plot
p_attributes_choice <- ggplot(plot_data, aes(x = estimate, y = term, color = group, shape = group)) +
  geom_point(size = 3, position = position_dodge(width = 0.5)) +  # Add vertical dodge to points
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), position = position_dodge(width = 0.5), height = 0) +  # Horizontal error bars with no caps
                scale_color_manual(values = c("Control" = "black", "Treatment" = "grey50")) +  
                scale_x_continuous(breaks = seq(-.75, 1.5, .25)) +
                labs(
                  x = "Estimates"
                ) +
                theme_bw(base_size = 12) +
                theme(
                  legend.position = "bottom",
                  legend.title = element_blank(),
                  axis.title.y = element_blank(),
                  axis.title.x = element_text(size = 18),  # <- this line makes the x-label larger
                  panel.grid.major = element_line(color = "gray90"),
                  axis.text = element_text(size = 15),
                  legend.text = element_text(size = 15)
                ) +
                geom_vline(xintercept = 0, linetype = "dashed")
print(p_attributes_choice)

#try(ggsave("", p_attributes_choice, width=8, height=8, units="in", dpi=300))

### Table E4: Regression of Candidate Evaluation on the Interaction of Policy 
### Congruence and Treatment Assignment Including Non-partisans and Partisans 
### of Minor Parties. Dependent variable: Candidate evaluation.

# For testing H2 with interaction (Table E4) 
h2_rate <- lm_robust(scale ~ d_gender + as.factor(d_experience) + cong_cc*Treatment + cong_co*Treatment + cong_de*Treatment + cong_he*Treatment + cong_ho*Treatment + cong_in*Treatment + PID_all + post_secondary + citizen + province + income + male,
                     data = dat3,
                     clusters = ResponseId)  

modelsummary(h2_rate, stars = TRUE)

### Table E5: Regression of Candidate Choice on Policy Congruence and Policy 
### Partisanship. Dependent variable: Candidate choice. 

te5 <- list(
  "Treatment" = lm_robust(Y ~ d_gender + as.factor(d_experience) + cong_PID + cong_cc + cong_in + cong_co + cong_de + cong_ho + cong_he + PID + post_secondary + citizen + province + income + male,
                      data = dat3[dat3$Treatment == "Partisanship",],
                      clusters = ResponseId),
  "Control" = lm_robust(Y ~ d_gender + as.factor(d_experience) + cong_cc + cong_in + cong_co + cong_de + cong_ho + cong_he + PID + post_secondary + citizen + province + income + male,
                     data = dat3[dat3$Treatment == "No partisanship",],
                     clusters = ResponseId)
)

modelsummary(te5, stars = TRUE)

### Figure E2: Average Marginal Effect of Policy Congruence and Other Conjoint 
### Attributes on Candidate Choice by Experimental Condition. 
### Dependent variable: Candidate Choice.

term_labels <- c(
  'd_genderWoman' = 'Woman',
  "as.factor(d_experience)4" = "Experience = 4 years",
  "as.factor(d_experience)12" = "Experience = 12 years",
  "cong_ccccCongruent" = "Climate Change: Congruent",
  "cong_hohoCongruent" = "Housing: Congruent",
  "cong_cocoCongruent" = "Cost of Living: Congruent",
  "cong_ininCongruent" = "Inequality: Congruent",
  "cong_dedeCongruent" = "Deficit: Congruent",
  "cong_heheCongruent" = "Healthcare: Congruent",
  "cong_PIDCongruent partisanship" = "Explicit Party Cue: In-party"
)

plot_data <- lapply(names(te5), function(group) {
  data.frame(
    group = group,
    term = names(te5[[group]]$coefficients),
    estimate = te5[[group]]$coefficients,
    std_error = te5[[group]]$std.error,
    p_value = te5[[group]]$p.value
  )
}) %>%
  bind_rows() %>%
  filter(grepl("^d_|^cong", term)) %>%  # Keep terms starting with "d_" or "cong"
  mutate(
    ci_lower = estimate - 1.96 * std_error,
    ci_upper = estimate + 1.96 * std_error,
    term = factor(term, levels = names(term_labels), labels = term_labels)  # Reorder and relabel
  ) |>
  filter(term != "Woman")

# Create the plot
p_attributes_choice <- ggplot(plot_data, aes(x = estimate, y = term, color = group, shape = group)) +
  geom_point(size = 3, position = position_dodge(width = 0.5)) +  # Add vertical dodge to points
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), position = position_dodge(width = 0.5), height = 0) +  # Horizontal error bars with no caps
                scale_color_manual(values = c("Control" = "black", "Treatment" = "grey50")) +  
                scale_x_continuous(breaks = seq(-.75, 1.5, .25)) +
                labs(
                  x = "Estimates"
                ) +
                theme_bw(base_size = 12) +
                theme(
                  legend.position = "bottom",
                  legend.title = element_blank(),
                  axis.title.y = element_blank(),
                  axis.title.x = element_text(size = 18),  # <- this line makes the x-label larger
                  panel.grid.major = element_line(color = "gray90"),
                  axis.text = element_text(size = 15),
                  legend.text = element_text(size = 15)
                ) +
                geom_vline(xintercept = 0, linetype = "dashed")

print(p_attributes_choice)

#try(ggsave("", p_attributes_choice, width=8, height=8, units="in", dpi=300))

### Table E6: Regression of Candidate Choice on the Interaction of Policy 
### Congruence and Treatment Assignment. Dependent variable: Candidate choice.

h2 <- lm_robust(Y ~ d_gender + as.factor(d_experience) + cong_cc*Treatment + cong_co*Treatment + cong_de*Treatment + cong_he*Treatment + cong_ho*Treatment + cong_in*Treatment + PID + post_secondary + citizen + province + income + male,
                data = dat3,
                clusters = ResponseId)  

modelsummary(h2, stars = TRUE)

### Table E7: Regression of Candidate Evaluation on the Interaction of Policy 
### Congruence and Treatment Assignment excluding NDP Identifiers. 
### Dependent variable: Candidate evaluation.

h2_rate_ndp <- lm_robust(scale ~ d_gender + as.factor(d_experience) + cong_cc*Treatment + cong_co*Treatment + cong_de*Treatment + cong_he*Treatment + cong_ho*Treatment + cong_in*Treatment + PID + post_secondary + citizen + province + income + male,
                         data = dat3[dat3$PID != "NDP",],
                         clusters = ResponseId)  

modelsummary(h2_rate_ndp, stars = TRUE)


################
### SI Section A: Demographics
################

#custom function to compare marginal contrasts
compare_contrasts <- function(contrast_df, estimate_col = "avg_estimate", se_col = "avg_se", contrast_col = "contrast") {
  # Get column values
  ests <- contrast_df[[estimate_col]]
  ses <- contrast_df[[se_col]]
  labels <- contrast_df[[contrast_col]]
  
  # All unique row combinations
  combs <- combn(seq_along(ests), 2)
  
  # Compute stats
  pairwise <- apply(combs, 2, function(idx) {
    i <- idx[1]; j <- idx[2]
    diff <- ests[i] - ests[j]
    se_diff <- sqrt(ses[i]^2 + ses[j]^2)
    z <- diff / se_diff
    p <- 2 * (1 - pnorm(abs(z)))
    
    tibble::tibble(
      contrast1 = labels[i],
      contrast2 = labels[j],
      diff = diff,
      se_diff = se_diff,
      z = z,
      p = p
    )
  })
  
  # Return as a data frame
  dplyr::bind_rows(pairwise) |> dplyr::arrange(p)
}

### Table A1: Sample demographics and comparison to population benchmarks

#relabel the province variable

dat3$province <- factor(
  dat3$province,
  levels = 1:13,
  labels = c(
    "Alberta",
    "British Columbia",
    "Manitoba",
    "New Brunswick",
    "Newfoundland and Labrador",
    "Northwest Territories",
    "Nova Scotia",
    "Nunavut",
    "Ontario",
    "Prince Edward Island",
    "Quebec",
    "Saskatchewan",
    "Yukon"
  )
)

prop.table(table(dat3$province))

#relabel the gender variable

dat3$male <- factor(
  dat3$male,
  levels = 0:1,
  labels = c(
    "Female",
    "Male"
  )
)

prop.table(table(dat3$male))

#relabel the education variable

dat3$post_secondary <- factor(
  dat3$post_secondary,
  levels = 1:2,
  labels = c(
    "No post-secondary eduaction",
    "Post-secondary eduaction"
  )
)

prop.table(table(dat3$post_secondary))

table(dat3$PID2)
prop.table(table(dat3$PID2))

sample_props <- function(x, var_label = NULL) {
  out <- as.data.frame(prop.table(table(x)) * 100)
  names(out) <- c("Demographic Variable", "Sample Percentage")
  out$`Sample Percentage` <- round(out$`Sample Percentage`, 2)
  if (!is.null(var_label)) out$Group <- var_label
  out
}

d_prov_sample <- sample_props(dat3$province, "Province") %>%
  mutate(`Demographic Variable` = case_when(
    `Demographic Variable` == "British Columbia" ~ "BC",
    `Demographic Variable` == "Prince Edward Island" ~ "PEI",
    TRUE ~ `Demographic Variable`
  ))

d_gen_sample <- sample_props(dat3$male, "Gender") %>%
  mutate(`Demographic Variable` = case_when(
    `Demographic Variable` == "Male" ~ "Men",
    `Demographic Variable` == "Female" ~ "Women",
    TRUE ~ `Demographic Variable`
  ))

d_edu_sample <- sample_props(dat3$post_secondary, "Education")

d_pid_sample <- sample_props(dat3$PID2, "Party") %>%
  mutate(`Demographic Variable` = case_when(
    `Demographic Variable` == "Conservative" ~ "Conservative Party",
    `Demographic Variable` == "Liberal" ~ "Liberal Party",
    `Demographic Variable` == "NDP" ~ "New Democratic Party",
    `Demographic Variable` == "Non-partisan" ~ "Other/none",
    TRUE ~ `Demographic Variable`
  ))

d_table <- bind_rows(d_gen_sample, d_edu_sample, d_prov_sample, d_pid_sample) %>%
  select(-Group)
view(d_table)

# -------------------------------
# Load BENCHMARKS: Statistics Canada (province pop)
# -------------------------------
prov_pop <- read_csv("1710000901_databaseLoadingData.csv", show_col_types = FALSE) %>%
  filter(REF_DATE == "2023-07")

d_prov_pop <- prov_pop %>%
  mutate(
    pop_prop = (VALUE / VALUE[GEO == "Canada"] * 100),
    pop_prop = round(pop_prop, 2),
    GEO = case_when(
      GEO == "Prince Edward Island" ~ "PEI",
      GEO == "British Columbia" ~ "BC",
      TRUE ~ GEO
    )
  ) %>%
  select(GEO, pop_prop) %>%
  filter(GEO != "Canada") %>%
  rename(
    "Demographic Variable" = GEO,
    "Benchmark Percentage" = pop_prop
  )

# -------------------------------
# BENCHMARKS: Statistics Canada (gender)
# -------------------------------
gen_pop <- read_csv("1710000501_databaseLoadingData.csv", show_col_types = FALSE) %>%
  filter(`Age group` == "All ages", REF_DATE == 2023)

d_gen <- gen_pop %>%
  mutate(
    gen_prop = (VALUE / VALUE[Gender == "Total - gender"] * 100),
    gen_prop = round(gen_prop, 2),
    Gender = case_when(
      Gender == "Men+" ~ "Men",
      Gender == "Women+" ~ "Women",
      TRUE ~ Gender
    )
  ) %>%
  select(Gender, gen_prop) %>%
  filter(Gender != "Total - gender") %>%
  rename(
    "Demographic Variable" = Gender,
    "Benchmark Percentage" = gen_prop
  )

d_stat_can <- bind_rows(d_prov_pop, d_gen)
view(d_stat_can)

# -------------------------------
# BENCHMARKS: CES Education (2-category) + party id
# Post-secondary education vs No post-secondary education
# Party benchmark: collapse BQ+GPC+PPC+Other/none into "Other/none"
# -------------------------------
ces <- read_dta("2021 Canadian Election Study v2.0.dta")

d_ces_pid <- ces %>%
  mutate(
    pid_collapsed = case_when(
      cps21_fed_id == 1 ~ "Liberal Party",          # LPC
      cps21_fed_id == 2 ~ "Conservative Party",     # CPC
      cps21_fed_id == 3 ~ "New Democratic Party",   # NDP
      TRUE ~ "Other/none"                                # BQ, GPC, PPC, other/none, etc.
    )
  ) %>%
  count(pid_collapsed) %>%
  mutate(
    `Demographic Variable` = pid_collapsed,
    `Benchmark Percentage` = round(n / sum(n) * 100, 2)
  ) %>%
  select(`Demographic Variable`, `Benchmark Percentage`)


d_ces_educ <- ces %>%
  mutate(
    educ_2cat = case_when(
      cps21_education > 8 & cps21_education <= 11 ~ "Post-secondary eduaction",
      cps21_education <= 8 ~ "No post-secondary eduaction",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(educ_2cat)) %>%
  count(educ_2cat) %>%
  mutate(
    `Demographic Variable` = educ_2cat,
    `Benchmark Percentage` = round(n / sum(n) * 100, 2)
  ) %>%
  select(`Demographic Variable`, `Benchmark Percentage`)

# Combine all benchmarks
d_comp <- bind_rows(d_stat_can, d_ces_educ, d_ces_pid)
view(d_comp)

# -------------------------------
# Merge sample + benchmarks
# -------------------------------

# Recode benchmark party abbreviations to full names (as in your draft)
merged_data <- d_table %>%
  full_join(d_comp, by = "Demographic Variable") %>%
  mutate(
    `Demographic Variable` = case_when(
      `Demographic Variable` == "BQ"  ~ "Bloc Québécois",
      `Demographic Variable` == "CPC" ~ "Conservative Party",
      `Demographic Variable` == "LPC" ~ "Liberal Party",
      `Demographic Variable` == "GPC" ~ "Green Party",
      `Demographic Variable` == "NDP" ~ "New Democratic Party",
      TRUE ~ `Demographic Variable`
    )
  )

demographic_order <- c(
  "Men", "Women",
  "No post-secondary eduaction", "Post-secondary eduaction",
  "Alberta", "BC", "Manitoba", "Saskatchewan", "Ontario", "Quebec",
  "PEI", "Nova Scotia", "New Brunswick", "Newfoundland and Labrador",
  "Yukon", "Northwest Territories", "Nunavut",
  "Liberal Party", "Conservative Party",  "New Democratic Party", "Other/none"
)

merged_data <- merged_data %>%
  mutate(`Demographic Variable` = factor(`Demographic Variable`, levels = demographic_order)) %>%
  arrange(`Demographic Variable`) %>%
  mutate(`Demographic Variable` = as.character(`Demographic Variable`)) %>%
  select(`Demographic Variable`, `Sample Percentage`, `Benchmark Percentage`)

view(merged_data)
# -------------------------------
# Create tinytable + export to Word
# -------------------------------

tt(merged_data,
   caption = "Demographic Table",
   notes = "Note: Benchmarks from the 2021 Canadian Election Study (education and party identification) and Statistics Canada (province, age, gender). Sample data is from the Omnibus Survey.") %>%
  style_tt(j = 2:3, align = "c") %>%
  save_tt("demographics.html")

rmarkdown::pandoc_convert("demographics.html", to = "rtf", output = "demographics.rtf")

################################## End of File #################################



